# ==============================================================================
# name: RQ3ab-speeding-straightlining.R
# date:	Jan 25, 2022
# author: Bernhard Clemm / Tiago Ventura
# purpose: compare professionals and non-professionals on speeding and straightlining
# ==============================================================================

rm(list = ls())

source("code/utils/functions.R")
source("code/utils/constants.R")

outcomes <- c(
  "straightliner", "duration",
  "duration_median_30", "duration_median_40", "duration_median_50"
)

# READ IN, FILTER AND BIND DATA ================================================

fb <- read.csv("data/analysis_FB.csv")

lu <- read.csv("data/analysis_LU.csv")

yg <- read.csv("data/analysis_YG.csv")

profs_fb <- fb %>%
  filter(n_days_active >= 7 & wave == 1) %>%
  mutate(person_id = as.character(person_id)) %>%
  select(
    dataset, person_id, weight,
    starts_with("professional_"),
    any_of(outcomes)
  ) %>%
  mutate(weight = ifelse(is.na(weight), 1, weight))

profs_lu <- lu %>%
  filter(n_days_active >= 7 & wave == 1) %>%
  mutate(person_id = as.character(person_id)) %>%
  select(
    dataset, person_id, weight,
    starts_with("professional_"),
    norm_treatment, malvol, perpol,
    any_of(outcomes)
  ) %>%
  mutate(weight = ifelse(is.na(weight), 1, weight))

profs_yg <- yg %>%
  filter(n_days_active >= 7) %>%
  mutate(person_id = as.character(person_id)) %>%
  select(
    dataset, person_id, weight,
    starts_with("professional_"),
    any_of(outcomes)
  )

# MAIN PAPER ===================================================================

## Table 2: Response quality professionals vs. non-professionals ####

### Facebook ####

summary_fb <- make_table(
  dt = profs_fb,
  var_prof = "professional_1",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

### Lucid ####

summary_lu <- make_table(
  dt = profs_lu,
  var_prof = "professional_1",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

### YouGov ####

summary_yg <- make_table(
  dt = profs_yg,
  var_prof = "professional_1",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

summary_all <- summary_lu %>%
  select(-p) %>%
  rename("lu_pro" = pro, "lu_nonpro" = nonpro, "lu_sig" = sig) %>%
  full_join(., summary_yg %>%
    rename("yg_pro" = pro, "yg_nonpro" = nonpro, "yg_sig" = sig)) %>%
  full_join(., summary_fb %>% select(-p) %>%
    rename("fb_pro" = pro, "fb_nonpro" = nonpro, "fb_sig" = sig)) %>%
  mutate(varname = case_when(
    variable == "straightliner" ~ "Straightliner (\\%)",
    variable == "duration" ~ "Duration (median seconds) ",
    variable == "duration_median_30" ~ "Duration (\\% 30\\% faster than median)",
    variable == "duration_median_40" ~ "Duration (\\% 40\\% faster than median)",
    variable == "duration_median_50" ~ "Duration (\\% 50\\% faster than median)"
  )) %>%
  mutate(across(everything(), ~ ifelse(is.na(.), "", .))) %>%
  select(
    varname,
    fb_pro, fb_sig, fb_nonpro,
    lu_pro, lu_sig, lu_nonpro,
    yg_pro, yg_sig, yg_nonpro
  )

export_kable_quality(
  dt = summary_all,
  caption = "Response quality of survey professionals vs. non-professionals (professionals = more than 100 survey visits / day)",
  format = "html",
  file = "output/tab2_rq2_comparison_quality.html"
)

## Text descriptives ####

# For the Lucid sample, the median professional is 6.8% faster compared to the
# median non-professional,representing a difference of one minute and twenty-six seconds.
# This difference is yet more pronounced for the YouGov and Facebook samples:
# professionals are 17.2% and 13.4% faster respectively.

duration <- summary_all %>%
  select(varname, ends_with("pro")) %>%
  mutate(across(c(ends_with("pro")), ~ as.numeric(gsub("\\(.*?\\)", "", .)))) %>%
  filter(varname == "Duration (median seconds) ")

round(100 - (duration$lu_pro / duration$lu_nonpro) * 100, 1)
round(100 - (duration$yg_pro / duration$yg_nonpro) * 100, 1)
round(100 - (duration$fb_pro / duration$fb_nonpro) * 100, 1)

# SM E.1.1 Alternative professionalism indicators ==============================

## Table E.7: Response quality of survey professionals vs. non-professionals (professionals = more than 50 percent visits to survey sites) ####

### Facebook ####

summary_fb_2 <- make_table(
  dt = profs_fb,
  var_prof = "professional_2",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

### Lucid ####

summary_lu_2 <- make_table(
  dt = profs_lu,
  var_prof = "professional_2",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

### YouGov ####

summary_yg_2 <- make_table(
  dt = profs_yg,
  var_prof = "professional_2",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

summary_all_2 <- summary_lu_2 %>%
  select(-p) %>%
  rename("lu_pro" = pro, "lu_nonpro" = nonpro, "lu_sig" = sig) %>%
  full_join(., summary_yg_2 %>%
    rename("yg_pro" = pro, "yg_nonpro" = nonpro, "yg_sig" = sig)) %>%
  full_join(., summary_fb_2 %>% select(-p) %>%
    rename("fb_pro" = pro, "fb_nonpro" = nonpro, "fb_sig" = sig)) %>%
  mutate(varname = case_when(
    variable == "straightliner" ~ "Straightliner (\\%)",
    variable == "duration" ~ "Duration (median seconds) ",
    variable == "duration_median_30" ~ "Duration (\\% 30\\% faster than median)",
    variable == "duration_median_40" ~ "Duration (\\% 40\\% faster than median)",
    variable == "duration_median_50" ~ "Duration (\\% 50\\% faster than median)"
  )) %>%
  mutate(across(everything(), ~ ifelse(is.na(.), "", .))) %>%
  select(
    varname,
    fb_pro, fb_sig, fb_nonpro,
    lu_pro, lu_sig, lu_nonpro,
    yg_pro, yg_sig, yg_nonpro
  )

export_kable_quality(
  dt = summary_all_2,
  caption = "Response quality of survey professionals vs. non-professionals (professionals = more than 50 percent visits to survey sites)",
  format = "html",
  file = "output/tabE7_rq2_comparison_quality_2.html"
)

## Table E.8: Response quality of survey professionals vs. non-professionals (professionals = more than 50 of browsing time to survey sites) ####

### Facebook ####

summary_fb_3 <- make_table(
  dt = profs_fb,
  var_prof = "professional_3",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

### Lucid ####

summary_lu_3 <- make_table(
  dt = profs_lu,
  var_prof = "professional_3",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

### YouGov ####

summary_yg_3 <- make_table(
  dt = profs_yg,
  var_prof = "professional_3",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

summary_all_3 <- summary_lu_3 %>%
  select(-p) %>%
  rename("lu_pro" = pro, "lu_nonpro" = nonpro, "lu_sig" = sig) %>%
  full_join(., summary_yg_3 %>%
    rename("yg_pro" = pro, "yg_nonpro" = nonpro, "yg_sig" = sig)) %>%
  full_join(., summary_fb_3 %>% select(-p) %>%
    rename("fb_pro" = pro, "fb_nonpro" = nonpro, "fb_sig" = sig)) %>%
  mutate(varname = case_when(
    variable == "straightliner" ~ "Straightliner (\\%)",
    variable == "duration" ~ "Duration (median seconds) ",
    variable == "duration_median_30" ~ "Duration (\\% 30\\% faster than median)",
    variable == "duration_median_40" ~ "Duration (\\% 40\\% faster than median)",
    variable == "duration_median_50" ~ "Duration (\\% 50\\% faster than median)"
  )) %>%
  mutate(across(everything(), ~ ifelse(is.na(.), "", .))) %>%
  select(
    varname,
    fb_pro, fb_sig, fb_nonpro,
    lu_pro, lu_sig, lu_nonpro,
    yg_pro, yg_sig, yg_nonpro
  )

export_kable_quality(
  dt = summary_all_3,
  caption = "Response quality of survey professionals vs. non-professionals (professionals = more than 50 of browsing time to survey sites)",
  format = "html",
  file = "output/tabE8_rq2_comparison_quality_3.html"
)

## Table E.9: Response quality of survey professionals vs. non-professionals (professionals = any of the measures) ####

### Facebook ####

summary_fb_any <- make_table(
  dt = profs_fb,
  var_prof = "professional_all",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

### Lucid ####

summary_lu_any <- make_table(
  dt = profs_lu,
  var_prof = "professional_all",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

### YouGov ####

summary_yg_any <- make_table(
  dt = profs_yg,
  var_prof = "professional_all",
  outcomes_median = c("duration"),
  outcomes_prop = c("straightliner", "duration_median_30", "duration_median_40", "duration_median_50"),
  outcomes_mean_raw = NULL, outcomes_mean_01 = NULL,
  order = c("straightliner", "duration", "duration_median_30", "duration_median_40", "duration_median_50"),
  weight_var = "weight",
  output = "html"
)

summary_all_any <- summary_lu_any %>%
  select(-p) %>%
  rename("lu_pro" = pro, "lu_nonpro" = nonpro, "lu_sig" = sig) %>%
  full_join(., summary_yg_any %>%
    rename("yg_pro" = pro, "yg_nonpro" = nonpro, "yg_sig" = sig)) %>%
  full_join(., summary_fb_any %>% select(-p) %>%
    rename("fb_pro" = pro, "fb_nonpro" = nonpro, "fb_sig" = sig)) %>%
  mutate(varname = case_when(
    variable == "straightliner" ~ "Straightliner (\\%)",
    variable == "duration" ~ "Duration (median seconds) ",
    variable == "duration_median_30" ~ "Duration (\\% 30\\% faster than median)",
    variable == "duration_median_40" ~ "Duration (\\% 40\\% faster than median)",
    variable == "duration_median_50" ~ "Duration (\\% 50\\% faster than median)"
  )) %>%
  mutate(across(everything(), ~ ifelse(is.na(.), "", .))) %>%
  select(
    varname,
    fb_pro, fb_sig, fb_nonpro,
    lu_pro, lu_sig, lu_nonpro,
    yg_pro, yg_sig, yg_nonpro
  )

export_kable_quality(
  dt = summary_all_any,
  caption = "Response quality of survey professionals vs. non-professionals (professionals = any of the measures)",
  format = "html",
  file = "output/tabE9_rq2_comparison_quality_any.html"
)

## SM E.3 Treatment effects differences ####

### Table E.10: Effect of openness prime on attribution of malevolence, by professionalism ####

reg_malvol <- list()
reg_malvol$professional_1 <- lm(
  malvol ~ norm_treatment * professional,
  data = profs_lu %>% mutate(professional = professional_1)
)
reg_malvol$professional_2 <- lm(
  malvol ~ norm_treatment * professional,
  data = profs_lu %>% mutate(professional = professional_2)
)
reg_malvol$professional_3 <- lm(
  malvol ~ norm_treatment * professional,
  data = profs_lu %>% mutate(professional = professional_3)
)
reg_malvol$professional_all <- lm(
  malvol ~ norm_treatment * professional,
  data = profs_lu %>% mutate(professional = professional_all)
)

stargazer(
  reg_malvol,
  title = "Effect of openness prime on attribution of malevolence, by professionalism",
  column.labels = c(
    "> 100 visits/day", "> 50\\% of visits",
    "> 50\\% of time", "Any of the three"
  ),
  covariate.labels = c(
    "Treatment", "Professional",
    "Treatment * Professional", "Constant"
  ),
  column.sep.width = "2pt", digits = 2, font.size = "small",
  omit.stat = c("ser", "f"),
  dep.var.labels.include = FALSE,
  dep.var.caption = "Definition of professionalism:",
  model.numbers = FALSE,
  header = FALSE, type = "html",
  out = "output/tabE10_rq3_treatment_effects_malvol.html"
)

reg_perpol <- list()
reg_perpol$professional_1 <- lm(
  perpol ~ norm_treatment * professional,
  data = profs_lu %>% mutate(professional = professional_1)
)
reg_perpol$professional_2 <- lm(
  perpol ~ norm_treatment * professional,
  data = profs_lu %>% mutate(professional = professional_2)
)
reg_perpol$professional_3 <- lm(
  perpol ~ norm_treatment * professional,
  data = profs_lu %>% mutate(professional = professional_3)
)
reg_perpol$professional_all <- lm(
  perpol ~ norm_treatment * professional,
  data = profs_lu %>% mutate(professional = professional_all)
)

stargazer(
  reg_perpol,
  title = "Effect of diversity prime on perception of polarization, by professionalism",
  column.labels = c(
    "> 100 visits/day", "> 50\\% of visits",
    "> 50\\% of time", "Any of the three"
  ),
  covariate.labels = c(
    "Treatment", "Professional",
    "Treatment * Professional", "Constant"
  ),
  column.sep.width = "2pt", digits = 2, font.size = "small",
  omit.stat = c("ser", "f"),
  dep.var.labels.include = FALSE,
  dep.var.caption = "Definition of professionalism:",
  model.numbers = FALSE,
  header = FALSE, type = "html",
  out = "output/tabE11_rq3_treatment_effects_perpol.html"
)
